library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.4.3
## Warning: package 'readr' was built under R version 3.4.3
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(ggplot2)
library(stringr)
## Warning: package 'stringr' was built under R version 3.4.3
library(plotly)
## Warning: package 'plotly' was built under R version 3.4.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(rworldmap)
## Warning: package 'rworldmap' was built under R version 3.4.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.4.3
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
library(data.table)
## Warning: package 'data.table' was built under R version 3.4.3
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(DT)
## Warning: package 'DT' was built under R version 3.4.3
Until recent decades, the Winter Olympics has been dominated by European and North American countries.
#import data from winter.csv
winter <- read.csv("winter.csv", stringsAsFactors = FALSE)
#import data from dictionary.csv
dict <- read.csv("dictionary.csv", stringsAsFactors = FALSE)
#Replace Soviet Union with Russia.
winter[winter$Country == "URS", "Country"] <- 'RUS'
#Replace East and West Germany with Germany
winter[winter$Country == "EUA", "Country"] <- "GER"
winter[winter$Country == "FRG", "Country"] <- "GER"
winter[winter$Country == "GDR", "Country"] <- "GER"
Taking a look at the tables below, we can see that the number of medals attained from countries outside Europe and North America is minimal in comparison.
#Find total number of each medal
medals <- c(winter$Country, winter$Medal)
m = data.frame(cbind(do.call('rbind', strsplit(winter$Country, " ")), winter$Medal))
m <- as.data.frame(table(m$X1, m$X2))
colnames(m) <- c("Country", "Medal", "Total")
#find number of competitions each country medaled in
n = data.frame(cbind(do.call('rbind', strsplit(winter$Country, " ")), winter$Year))
n <- as.data.frame(table(n$X1, n$X2))
n <- n[n$Freq > 0,]
n <- n$Var1
n <- as.data.frame(table(n))
colnames(n) <- c("Country", "Medaled")
datatable(m, options= list(), rownames = FALSE, )
datatable(n, options= list(), rownames = FALSE)
In the barchart below, we have grouped together the best performing countries throughout the history of the Winter Olympics (Canada, Germany, Norway, Russia/Soviet Union, Sweden, and the USA) with the best performing countries outside the European and North American continent.
most <- m
colnames(most) <- c("Country", "Medal", "Total")
most <- most %>%
arrange(-Total, Medal) %>% # sort dataframe
mutate(name_factor = factor(Country, unique(Country))) # Create a factor.
#change ggplot behavior and center align all titles
theme_update(plot.title = element_text(hjust = 0.5))
#plot chosen countries
plot1 <- ggplot(subset(most, Country %in% c('USA', 'CAN', 'NOR', 'SWE', 'GER', 'RUS', 'KOR', 'CHN', 'JPN')),
aes(x= Country, y= Total, fill=Medal)) + geom_bar(position = "dodge",stat ='identity')+
scale_fill_manual(values=c("#cd7f32", "gold", 'grey')) + ggtitle("Comparison of total medals")
plot1
The difference in total medals is enormous. None of the countries outside the European and North American continent come even close to any of the best performing countries. South Korea, the non-European and non-North American country with most gold medals, has roughly less than a third of the gold medals Norway has. Despite the difference in total medals, over the last few decades, performance and participation of non-European and non-North American countries has steadily been improving. This is especially the case for East Asian countries like South Korea, Japan, and China. Below we have plotted a time series graph to show the performances of these countries over time.
#filter out years before 1952
years <- c()
i <- 0
for (year in 1949:1993){
if (i < 3){
i <- i + 1
}
else{
i <- 0
years <- c(years, year)
}
}
for (year in 1992:2016){
if (i < 3){
i <- i + 1
}
else{
i <- 0
years <- c(years, year)
}
}
#Total number of medals won each year by country
medal_count <- as.data.frame(table(winter$Country, winter$Medal, winter$Year))
colnames(medal_count) <- c("Country", "Medal", "Year", "Total")
medal_count <- aggregate.data.frame(medal_count$Total, by= list(medal_count$Country, medal_count$Year), FUN=sum)
colnames(medal_count) <- c("Country", "Year", "Total")
medal_temp <-medal_count
medal_count <- subset(medal_count, medal_count$Year %in% years)
#plot data
plot2 <- ggplot(subset(medal_count, Country %in% c('USA', 'JPN', 'NOR', 'SWE', "KOR", "CHN", "FIN", "FRA", "AUS"))) + geom_line(aes(x= Year, y= Total, color=Country, group=Country),position = "dodge",stat ='identity') + scale_y_continuous(trans='log2')
plot2 + ggtitle("Number of total medals over time")
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Width not defined. Set with `position_dodge(width = ?)`
upd <- subset(medal_count, medal_count$Country %in%c('JPN',"KOR", "CHN"))
ggplot() +
# draw the original data series with grey
geom_line(aes(Year, Total, group = Country), data = subset(medal_count, Country %in% c('USA', 'JPN', 'NOR', 'SWE', "KOR", "CHN", "FIN", "FRA","AUS")), color = alpha("grey", 0.7)) +
# color filtered data
# logartihmic graph to emphasize on the rate of growth
geom_line(aes(Year, Total, color = Country, group = Country), data = upd)+ scale_y_continuous(trans='log2') + ggtitle("Improvement of performance for East Asian Countries")
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
These two plots depict the number of medals each country has won each year. Although there is an overall increasing trend in the number of medals(due to the additional sports that get added in each competition), by taking a look at the “Improvement of performance for East Asian Countries” plot, it is easily seen that there has been a steep growth in performance for the three East Asian countries starting from 1992.
med <- str_split(winter$Medal, ",")
winter$Medal<- do.call(rbind, med)[,1]
winter$Medal <- str_trim(winter$Medal, side="both")
rm(med)
table(winter$Medal)
##
## Bronze Gold Silver
## 1919 1921 1930
golden <- winter[winter$Medal == "Gold",]
golden <- as.data.frame(table(golden$Country, golden$Medal))
Below, we have plotted the total number of gold medals won by each country on a map.
gdp <- data.frame(dict$Code, dict$GDP.per.Capita)
colnames(gdp) <- c("Country", "GDP")
pop <- data.frame(dict$Code, dict$Population)
colnames(pop) <- c("Country", "Population")
#change GER to DEU to color Germany on map
temp <- most
most<-most[most$Medal=="Gold",]
most[, "Country"] <- sapply(most[, "Country"], as.character)
most$Country[most$Country == "GER"] <- "DEU"
country <- most$Country
data <- most$Total
dF <- data.frame(country=country, data=data)
#join data to a map to create a spatialPolygonsDataFrame (from StackOverflow)
sPDF <- joinCountryData2Map(dF, joinCode='NAME', nameJoinColumn='country')
## 31 codes from your data successfully matched countries in the map
## 10 codes from your data failed to match with a country code in the map
## 212 codes from the map weren't represented in your data
#default map (see rworldmap documentation for options e.g. catMethod, numCats, colourPalette, mapRegion)
#missingCountryCol used for NA and countries not in the join file
ma <- mapCountryData(sPDF, nameColumnToPlot='data', mapTitle = "Gold medal performance of countries", missingCountryCol='dark grey')
ma
## $colourVector
## [1] "#FFFF80FF" "#FFFF00FF" "#FFCC00FF" "#FF9900FF" "#FF6600FF" "#FF3300FF"
## [7] "#FF0000FF" "dark grey"
##
## $cutVector
## 0% 14.28571% 28.57143% 42.85714% 57.14286% 71.42857%
## 0.0000000 0.2857143 2.0000000 5.0000000 18.5714286 54.0000000
## 85.71429% 100%
## 149.8571429 344.0000000
##
## $plottedData
## [1] NA NA NA NA NA NA NA NA 5 79 NA NA 2 NA NA NA NA
## [18] NA NA 6 NA NA NA NA NA NA NA 315 NA NA 16 NA NA NA
## [35] NA NA NA NA NA NA 28 226 NA NA NA NA NA NA NA 1 4
## [52] NA 66 NA NA 36 NA 34 NA NA NA NA NA NA NA NA NA NA
## [69] NA NA NA 0 NA NA NA NA NA NA NA 58 NA NA 17 1 NA
## [86] NA NA 51 NA NA NA NA NA NA NA NA NA 0 NA NA NA NA
## [103] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 159
## [120] NA 0 NA NA NA NA NA NA 6 NA 0 NA NA NA 0 344 NA
## [137] NA NA NA NA NA NA NA NA NA NA NA 2 NA 127 NA NA NA
## [154] NA NA NA NA NA NA NA NA NA NA NA 5 NA 167 1 NA NA
## [171] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [188] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [205] NA NA NA NA NA 2 NA NA NA NA NA NA NA NA NA NA NA
## [222] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [239] NA NA NA NA NA
##
## $catMethod
## [1] "quantiles"
##
## $colourPalette
## [1] "heat"
Through this plot, we are able to easily visualize the countries that have better results in the Winter Olympics. It does not take long to realize that countries tinted in red (the ones with most medals) are all located in Europe and North America. We could theorize that geographical location (being located on the north) could be the reason for better performance, but there could be other reasons, two of them being population and GDP per Capita. It could be the case that the more people a country has, the higher chance it has to produce atheltic stars. The higher GDP per Capita a country has, the better chance people will have to buy expensive winter sports equipment and train in better facilities. Below we have plotted performace with regards to population of each country (gold medals/population).
pop[, "Country"] <- sapply(pop[, "Country"], as.character)
pop$Country[pop$Country == "GER"] <- "DEU"
m_pop <- total <- merge(pop,most,by="Country")
m_pop[, "Country"] <- sapply(m_pop[, "Country"], as.character)
m_pop$Country[m_pop$Country == "GER"] <- "DEU"
country <- m_pop$Country
data <- m_pop$Total/m_pop$Population
dF <- data.frame(country=country, data=data)
#join data to a map to create a spatialPolygonsDataFrame
sPDF <- joinCountryData2Map(dF, joinCode='NAME', nameJoinColumn='country')
## 30 codes from your data successfully matched countries in the map
## 7 codes from your data failed to match with a country code in the map
## 213 codes from the map weren't represented in your data
#default map (see rworldmap documentation for options e.g. catMethod, numCats, colourPalette, mapRegion)
#missingCountryCol used for NA and countries not in the join file
ma <- mapCountryData(sPDF, nameColumnToPlot='data', mapTitle = "Gold medal performance with regard to population", missingCountryCol='dark grey')
ma
## $colourVector
## [1] "#FFFF80FF" "#FFFF00FF" "#FFCC00FF" "#FF9900FF" "#FF6600FF" "#FF3300FF"
## [7] "#FF0000FF" "dark grey"
##
## $cutVector
## 0% 14.28571% 28.57143% 42.85714% 57.14286%
## 0.000000e+00 1.307913e-08 1.172748e-07 2.781694e-07 5.913467e-07
## 71.42857% 85.71429% 100%
## 2.577597e-06 9.118784e-06 5.328928e-05
##
## $plottedData
## [1] NA NA NA NA NA
## [6] NA NA NA 2.102504e-07 9.174218e-06
## [11] NA NA 1.772151e-07 NA NA
## [16] NA NA NA NA 6.307159e-07
## [21] NA NA NA NA NA
## [26] NA NA 8.786176e-06 NA NA
## [31] 1.166844e-08 NA NA NA NA
## [36] NA NA NA NA NA
## [41] 2.653722e-06 2.775964e-06 NA NA NA
## [46] NA NA NA NA 2.154324e-08
## [51] 3.048785e-06 NA 1.203937e-05 NA NA
## [56] 5.388545e-07 NA 5.219669e-07 NA NA
## [61] NA NA NA NA NA
## [66] NA NA NA NA NA
## [71] NA 0.000000e+00 NA NA NA
## [76] NA NA NA NA 9.539147e-07
## [81] NA NA 1.339021e-07 5.699913e-08 NA
## [86] NA NA 1.007566e-06 NA NA
## [91] NA NA NA NA NA
## [96] NA NA 0.000000e+00 NA NA
## [101] NA NA NA NA NA
## [106] NA NA NA NA NA
## [111] NA NA NA NA NA
## [116] NA NA NA 3.060093e-05 NA
## [121] 0.000000e+00 NA NA NA NA
## [126] NA NA 1.578968e-07 NA 0.000000e+00
## [131] NA NA NA NA 2.387284e-06
## [136] NA NA NA NA NA
## [141] NA NA NA NA NA
## [146] NA NA 3.687282e-07 NA 1.296068e-05
## [151] NA NA NA NA NA
## [156] NA NA NA NA NA
## [161] NA NA NA NA 1.106239e-07
## [166] NA 5.195713e-07 3.194939e-08 NA NA
## [171] NA NA NA NA NA
## [176] NA NA NA NA NA
## [181] NA NA NA NA NA
## [186] NA NA NA NA NA
## [191] NA NA NA NA NA
## [196] NA NA NA NA NA
## [201] NA NA NA NA NA
## [206] NA NA NA NA 5.328928e-05
## [211] NA NA NA NA NA
## [216] NA NA NA NA NA
## [221] NA NA NA NA NA
## [226] NA NA NA NA NA
## [231] NA NA NA NA NA
## [236] NA NA NA NA NA
## [241] NA NA NA
##
## $catMethod
## [1] "quantiles"
##
## $colourPalette
## [1] "heat"
Countries like the USA and Russia have been assigned a lighter color after this calculation. This means that if we were to take population in count, USA and Russia have not performed as well as Canada or Germany. Instead, South Korea is up to these countries’ level. The plot below takes GDP per Capita into account (gold medals/GDP per capita).
gdp[, "Country"] <- sapply(gdp[, "Country"], as.character)
gdp$Country[gdp$Country == "GER"] <- "DEU"
m_gdp <- total <- merge(gdp,most,by="Country")
m_gdp[, "Country"] <- sapply(m_gdp[, "Country"], as.character)
m_gdp$Country[m_gdp$Country == "GER"] <- "DEU"
country <- m_gdp$Country
data <- m_gdp$Total/m_gdp$GDP
dF <- data.frame(country=country, data=data)
#join data to a map to create a spatialPolygonsDataFrame
sPDF <- joinCountryData2Map(dF, joinCode='NAME', nameJoinColumn='country')
## 30 codes from your data successfully matched countries in the map
## 7 codes from your data failed to match with a country code in the map
## 213 codes from the map weren't represented in your data
#default map (see rworldmap documentation for options e.g. catMethod, numCats, colourPalette, mapRegion)
#missingCountryCol used for NA and countries not in the join file
ma <- mapCountryData(sPDF, nameColumnToPlot='data', mapTitle = "Gold medal performance with regard to GDP per capita", missingCountryCol='dark grey')
ma
## $colourVector
## [1] "#FFFF80FF" "#FFFF00FF" "#FFCC00FF" "#FF9900FF" "#FF6600FF" "#FF3300FF"
## [7] "#FF0000FF" "dark grey"
##
## $cutVector
## 0% 14.28571% 28.57143% 42.85714% 57.14286%
## 0.000000e+00 4.804309e-05 2.024221e-04 6.671398e-04 1.575184e-03
## 71.42857% 85.71429% 100%
## 1.952355e-03 2.577333e-03 3.783304e-02
##
## $plottedData
## [1] NA NA NA NA NA
## [6] NA NA NA 8.879266e-05 1.804684e-03
## [11] NA NA 4.959822e-05 NA NA
## [16] NA NA NA NA 1.045213e-03
## [21] NA NA NA NA NA
## [26] NA NA 7.283485e-03 NA NA
## [31] 1.993103e-03 NA NA NA NA
## [36] NA NA NA NA NA
## [41] 1.595593e-03 5.470391e-03 NA NA NA
## [46] NA NA NA NA 3.871230e-05
## [51] 2.336653e-04 NA 1.559877e-03 NA NA
## [56] 9.943222e-04 NA 7.749116e-04 NA NA
## [61] NA NA NA NA NA
## [66] NA NA NA NA NA
## [71] NA 0.000000e+00 NA NA NA
## [76] NA NA NA NA 1.936056e-03
## [81] NA NA 5.234439e-04 9.514765e-05 NA
## [86] NA NA 1.873517e-03 NA NA
## [91] NA NA NA NA NA
## [96] NA NA 0.000000e+00 NA NA
## [101] NA NA NA NA NA
## [106] NA NA NA NA NA
## [111] NA NA NA NA NA
## [116] NA NA NA 2.137086e-03 NA
## [121] 0.000000e+00 NA NA NA NA
## [126] NA NA 4.779145e-04 NA NA
## [131] NA NA NA NA 3.783304e-02
## [136] NA NA NA NA NA
## [141] NA NA NA NA NA
## [146] NA NA 1.243141e-04 NA 2.510890e-03
## [151] NA NA NA NA NA
## [156] NA NA NA NA NA
## [161] NA NA NA NA 2.364117e-03
## [166] NA 2.975993e-03 4.690277e-04 NA NA
## [171] NA NA NA NA NA
## [176] NA NA NA NA NA
## [181] NA NA NA NA NA
## [186] NA NA NA NA NA
## [191] NA NA NA NA NA
## [196] NA NA NA NA NA
## [201] NA NA NA NA NA
## [206] NA NA NA NA NA
## [211] NA NA NA NA NA
## [216] NA NA NA NA NA
## [221] NA NA NA NA NA
## [226] NA NA NA NA NA
## [231] NA NA NA NA NA
## [236] NA NA NA NA NA
## [241] NA NA NA
##
## $catMethod
## [1] "quantiles"
##
## $colourPalette
## [1] "heat"
The USA and Russia are back to red, but Finland has dropped to orange. In fact, according to this calculation, its performance is not as good as China’s or South Korea’s performance. China has been bumped up to be on par with all the other top performing countries. In the past, European and North American countries were better off economically than most other countries. As a result, they may have had better training, facilities, and equipment for players. However, East Asian countires have had massive economic growth over the past few decades, and now have access to better resources as well. This may be able to partially explain the progress of East Asian countires at the Winter Olypics. As a result of having economically advanced over the past decades, China and South Korea have acquired the rights to host the Winter Olympics. In 2018, the Winter Olympics will be held in Pyeonchang South Korea and in 2022 it will be held in Beijing, China. This is the first time two Winter Olympics will be consecutively held in Asia. We are curious to see if holding these events at home ground will further boost the performance of these countries. Below we have plotted a time-series graph that shows the number of medals each country has one by year divided by the total number of medals available to accurately plot the performance of a country at each event. We have plotted graphs for countries that have held the Winter Olympics at least twice.
library(rvest)
library(stringr)
wiki_hosts <- read_html("https://en.wikipedia.org/wiki/Winter_Olympic_Games")
hosts <- html_table(html_nodes(wiki_hosts, "table")[[5]], fill=TRUE)
hosts <- hosts[-1,1:3]
hosts$city <- str_split_fixed(hosts$Host, n=2, ",")[,1]
hosts$country <- str_split_fixed(hosts$Host, n=2, ",")[,2]
finished_games <- head(hosts,-2)
#USA, France, Austria, Canada, Japan, Italy, Norway, Switzerland
#extract relevant details to a new table
tpy <- as.data.frame(table(winter$Country, winter$Medal, winter$Year))
colnames(tpy) <- c("Country", "Medal", "Year", "Freq")
tpy <- aggregate.data.frame(tpy$Freq, by= list(tpy$Year), FUN=sum)
colnames(tpy) <- c("Year", "Freq")
medal_year <- medal_temp
#divide number of medals by number of total medals available per year to see relative performance
v <- cbind(medal_year[1], medal_year[-1] / tpy[match(medal_year$Year, tpy$Year), -1])
v$Year <- medal_year$Year
medal_year <- v
#Performance of the US
usa_years <- finished_games$Year[finished_games$country == " United States"]
plot1 <- ggplot() + geom_line(data=subset(medal_year, Country %in% c('USA')), aes(x= Year, y= Total, color=Country, group=Country),position = "dodge",stat ='identity') + theme(axis.text.x = element_text(angle=90, size=rel(0.8), hjust=1)) +
geom_point(data=subset(subset(medal_year, Year %in% usa_years), Country %in% c('USA')), aes(x=Year, y=Total, color=Country, group=Country))
plot1 + ggtitle("Yearly Performance of the USA")
#Performance of France
fra_years <- finished_games$Year[finished_games$country == " France"]
plotf <- ggplot() + geom_line(data=subset(medal_year, Country %in% c('FRA')), aes(x= Year, y= Total, color=Country, group=Country),position = "dodge",stat ='identity') + theme(axis.text.x = element_text(angle=90, size=rel(0.8), hjust=1)) +
geom_point(data=subset(subset(medal_year, Year %in% fra_years), Country %in% c('FRA')), aes(x=Year, y=Total, color=Country, group=Country))
plotf + ggtitle("Yearly Performance of France")
#Performance of Canada
can_years <- finished_games$Year[finished_games$country == " Canada"]
plotc <- ggplot() + geom_line(data=subset(medal_year, Country %in% c('CAN')), aes(x= Year, y= Total, color=Country, group=Country),position = "dodge",stat ='identity') + theme(axis.text.x = element_text(angle=90, size=rel(0.8), hjust=1)) +
geom_point(data=subset(subset(medal_year, Year %in% can_years), Country %in% c('CAN')), aes(x=Year, y=Total, color=Country, group=Country))
plotc + ggtitle("Yearly Performance of Canada")
#Performance of Italy
ita_years <- finished_games$Year[finished_games$country == " Italy"]
ploti <- ggplot() + geom_line(data=subset(medal_year, Country %in% c('ITA')), aes(x= Year, y= Total, color=Country, group=Country),position = "dodge",stat ='identity') + theme(axis.text.x = element_text(angle=90, size=rel(0.8), hjust=1)) +
geom_point(data=subset(subset(medal_year, Year %in% ita_years), Country %in% c('ITA')), aes(x=Year, y=Total, color=Country, group=Country))
ploti + ggtitle("Yearly Performance of Italy")
#Performance of Austria
aut_years <- finished_games$Year[finished_games$country == " Austria"]
plota <- ggplot() + geom_line(data=subset(medal_year, Country %in% c('AUT')), aes(x= Year, y= Total, color=Country, group=Country),position = "dodge",stat ='identity') + theme(axis.text.x = element_text(angle=90, size=rel(0.8), hjust=1)) +
geom_point(data=subset(subset(medal_year, Year %in%aut_years), Country %in% c('AUT')), aes(x=Year, y=Total, color=Country, group=Country))
plota + ggtitle("Yearly Performance of Austria")
#local max
The points on the line show the year the respective country hosted the Winter Olympics. All countries with the exception of Italy had their best performing year at their own home ground. Moreover, there is also at least a small spike of performance for each country whenever they its hosting the event at its home country (with the exception of Canada at 1988). From this, we can deduce that South Korea and China will possibly perform better when hosting the Winter Olympics.
As seen in the “Improvement of performance for East Asian Countries” plot, Asian countries have started to perform drastically better starting from 1992. We want to see the reason to this. We will pay special attention to South Korea as it has won the most gold medals.
kor_sports <- as.data.frame(table(winter$Discipline, winter$Country, winter$Medal, winter$Year))
#Get relevant values for Korea
colnames(kor_sports) <- c("Discipline", "Country", "Medal", "Year", "Number")
kor_sports <- kor_sports[kor_sports$Country == "KOR",]
kor_sports <- kor_sports[kor_sports$Number > 0,]
plotk <- ggplot(data=kor_sports, aes(x=Year, y=Number, fill=Discipline)) +
geom_bar(stat="identity")
#hovering the mouse will allow users to see exact number of medals for each discipline
ggplotly(plotk)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
From the barchart above, it is obvious that South Korea has attained most of its medals from Short Track Speed Skating. It turns out that this discipline has been added to the Winter Olympics starting from 1992, and is the reason why South Korea has had a sudden spike in performance. However, as the year progresses, South Korea is starting to acquire medals from other discplines as well.
Below, we have plotted another bar chart to compare the performances with the best winter atheltes of all time with the best East Asian winter athlete.
ath <- as.data.frame(table(winter$Country, winter$Athlete, winter$Medal))
ath <- ath[order(ath$Freq, decreasing = TRUE), ]
#Look at number of Gold medals of athelete and extract if more than or equal to 5
best_gold <- ath[ath$Var3 == "Gold",]
bg_select <- best_gold[best_gold$Freq >= 5,]
names <- bg_select$Var2
#add Korean best athlete for comparison
names[10] <- "CHUN, Lee-Kyung"
colnames(ath) <- c('Country','Name', 'Medal', 'Total')
plotn <- ggplot(subset(ath, Name %in% names),
aes(x= Name, y= Total, fill=Medal, label=Country)) + geom_bar(position = "dodge",stat ='identity')+
scale_fill_manual(values=c("#cd7f32", "gold", 'grey'))+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
#hover over bars to see number of medals and country of origin of a player
ggplotly(plotn)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
At an individual scale, the best East Asian athlete (Lee-Kyung Chun) does not seem to trail too much in comparison to the best athletes of all time. Ahn Hyun Soo (Viktor An) has won three gold medals and a bronze medal for South Korea, changed his nationality to Russian, and has another three gold medals and a bronze medals. If we are able to count the performance of this athlete as an individual East Asian athlete, he would have six gold medals and two bronze medals, making him one of the best winter athletes of all time.
Although European and North American countries still dominate the Winter Olympics, Asian countries have relatively recently started to immerse themselves in the competition. Since 1992, their results have been improving, and as time progresses, somday they might be able to catch up with other Winter Olympic Giants.
Below we have provided a comprehensive table with all East Asian atheltes that have won medals in the Winter Olympics for those who had like to further explore East Asian performance at the Winter Olympics.
#create table for East Asian countries
asia_table <- winter[winter$Country %in% c("KOR", "JPN", "CHN"),]
rownames(asia_table) <- NULL
datatable(asia_table, options= list(), rownames = FALSE)
```